mdata <- readxl::read_xlsx("./2014 and 2015 CSM dataset.xlsx") %>% rename('Aggregate.Followers'='Aggregate Followers')
head(mdata)
## # A tibble: 6 x 14
## Movie Year Ratings Genre Gross Budget Screens Sequel Sentiment Views
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 13 S… 2014 6.3 8 9.13e3 4.00e6 45 1 0 3.28e6
## 2 22 J… 2014 7.1 1 1.92e8 5.00e7 3306 2 2 5.83e5
## 3 3 Da… 2014 6.2 1 3.07e7 2.80e7 2872 1 0 3.05e5
## 4 300:… 2014 6.3 1 1.06e8 1.10e8 3470 2 0 4.53e5
## 5 A Ha… 2014 4.7 8 1.73e7 3.50e6 2310 2 0 3.15e6
## 6 A Lo… 2014 4.6 3 2.90e4 5.00e5 NA 1 0 9.11e4
## # … with 4 more variables: Likes <dbl>, Dislikes <dbl>, Comments <dbl>,
## # Aggregate.Followers <dbl>
#glimpse(mdata)
mdata %>% View()
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.1 ✔ purrr 0.2.5
## ✔ tidyr 1.0.0 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks skimr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dataMaid::summarize() masks dplyr::summarize()
## $Movie
## $Movie$identifyMissing
## No problems found.
## $Movie$identifyWhitespace
## No problems found.
## $Movie$identifyLoners
## Note that the following levels have at most five observations: 13 Sins, 22 Jump Street, 3 Days to Kill, 300: Rise of an Empire, A Haunted House 2, A Long Way Off, A Million Ways to Die in the West, A Most Violent Year, A Walk Among the Tombstones, About Last Night (221 additional values omitted).
## $Movie$identifyCaseIssues
## No problems found.
## $Movie$identifyNums
## No problems found.
##
## $Year
## $Year$identifyMissing
## No problems found.
## $Year$identifyWhitespace
## No problems found.
## $Year$identifyLoners
## No problems found.
## $Year$identifyCaseIssues
## No problems found.
## $Year$identifyNums
## No problems found.
##
## $Ratings
## $Ratings$identifyMissing
## No problems found.
## $Ratings$identifyOutliers
## Note that the following possible outlier values were detected: 3.1.
##
## $Genre
## $Genre$identifyMissing
## The following suspected missing value codes enter as regular values: 8, 9.
## $Genre$identifyWhitespace
## No problems found.
## $Genre$identifyLoners
## Note that the following levels have at most five observations: 4, 6, 7.
## $Genre$identifyCaseIssues
## No problems found.
## $Genre$identifyNums
## No problems found.
##
## $Gross
## $Gross$identifyMissing
## No problems found.
## $Gross$identifyOutliers
## Note that the following possible outlier values were detected: 643.
##
## $Budget
## $Budget$identifyMissing
## No problems found.
## $Budget$identifyOutliers
## No problems found.
##
## $Screens
## $Screens$identifyMissing
## No problems found.
## $Screens$identifyOutliers
## Note that the following possible outlier values were detected: 4080, 4151, 4233, 4253, 4274, 4276, 4301, 4324.
##
## $Sequel
## $Sequel$identifyMissing
## No problems found.
## $Sequel$identifyOutliers
## Note that the following possible outlier values were detected: 2, 3, 4, 5, 6, 7.
##
## $Sentiment
## $Sentiment$identifyMissing
## No problems found.
## $Sentiment$identifyOutliers
## Note that the following possible outlier values were detected: -38, -17, -11, -9, -8, -6, -5, -4, -3, -2 (1 additional values omitted).
##
## $Views
## $Views$identifyMissing
## No problems found.
## $Views$identifyOutliers
## Note that the following possible outlier values were detected: 31859569, 32626778.
##
## $Likes
## $Likes$identifyMissing
## No problems found.
## $Likes$identifyOutliers
## Note that the following possible outlier values were detected: 187162, 370552.
##
## $Dislikes
## $Dislikes$identifyMissing
## No problems found.
## $Dislikes$identifyOutliers
## Note that the following possible outlier values were detected: 3439, 3524, 3565, 3812, 4245, 4382, 4752, 5746, 13960.
##
## $Comments
## $Comments$identifyMissing
## No problems found.
## $Comments$identifyOutliers
## Note that the following possible outlier values were detected: 18077, 24919, 38363.
##
## $Aggregate.Followers
## $Aggregate.Followers$identifyMissing
## No problems found.
## $Aggregate.Followers$identifyOutliers
## No problems found.
##
## $Gross_log
## $Gross_log$identifyMissing
## No problems found.
## $Gross_log$identifyOutliers
## Note that the following possible outlier values were detected: 5.4, 5.46, 5.48, 5.5, 5.54, 5.55, 5.56, 5.78, 5.81, 5.82 (4 additional values omitted).
## $Movie
## $Movie$identifyMissing
## No problems found.
## $Movie$identifyWhitespace
## No problems found.
## $Movie$identifyLoners
## Note that the following levels have at most five observations: 13 Sins, 22 Jump Street, 3 Days to Kill, 300: Rise of an Empire, A Haunted House 2, A Long Way Off, A Million Ways to Die in the West, A Most Violent Year, A Walk Among the Tombstones, About Last Night (221 additional values omitted).
## $Movie$identifyCaseIssues
## No problems found.
## $Movie$identifyNums
## No problems found.
##
## $Year
## $Year$identifyMissing
## No problems found.
## $Year$identifyWhitespace
## No problems found.
## $Year$identifyLoners
## No problems found.
## $Year$identifyCaseIssues
## No problems found.
## $Year$identifyNums
## No problems found.
##
## $Ratings
## $Ratings$identifyOutliers
## Note that the following possible outlier values were detected: 3.1.
##
## $Genre
## $Genre$identifyMissing
## The following suspected missing value codes enter as regular values: 8, 9.
## $Genre$identifyWhitespace
## No problems found.
## $Genre$identifyLoners
## Note that the following levels have at most five observations: 4, 6, 7.
## $Genre$identifyCaseIssues
## No problems found.
## $Genre$identifyNums
## No problems found.
##
## $Gross
## $Gross$identifyOutliers
## Note that the following possible outlier values were detected: 643.
##
## $Budget
## $Budget$identifyOutliers
## No problems found.
##
## $Screens
## $Screens$identifyOutliers
## Note that the following possible outlier values were detected: 4080, 4151, 4233, 4253, 4274, 4276, 4301, 4324.
##
## $Sequel
## $Sequel$identifyOutliers
## Note that the following possible outlier values were detected: 2, 3, 4, 5, 6, 7.
##
## $Sentiment
## $Sentiment$identifyOutliers
## Note that the following possible outlier values were detected: -38, -17, -11, -9, -8, -6, -5, -4, -3, -2 (1 additional values omitted).
##
## $Views
## $Views$identifyOutliers
## Note that the following possible outlier values were detected: 31859569, 32626778.
##
## $Likes
## $Likes$identifyOutliers
## Note that the following possible outlier values were detected: 187162, 370552.
##
## $Dislikes
## $Dislikes$identifyOutliers
## Note that the following possible outlier values were detected: 3439, 3524, 3565, 3812, 4245, 4382, 4752, 5746, 13960.
##
## $Comments
## $Comments$identifyOutliers
## Note that the following possible outlier values were detected: 18077, 24919, 38363.
##
## $Aggregate.Followers
## $Aggregate.Followers$identifyOutliers
## No problems found.
##
## $Gross_log
## $Gross_log$identifyOutliers
## Note that the following possible outlier values were detected: 5.4027, 5.4553, 5.4848, 5.5013, 5.5413, 5.553, 5.5607, 5.7838, 5.8081, 5.8201 (4 additional values omitted).
## Warning: package 'PerformanceAnalytics' was built under R version 3.5.2
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
#EDAreg
## Warning: package 'gplots' was built under R version 3.5.2
##
## Attaching package: 'gplots'
## The following object is masked from 'package:PerformanceAnalytics':
##
## textplot
## The following object is masked from 'package:stats':
##
## lowess
## corrplot 0.84 loaded
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
## Classes 'tbl_df', 'tbl' and 'data.frame': 231 obs. of 15 variables:
## $ Movie : chr "13 Sins" "22 Jump Street" "3 Days to Kill" "300: Rise of an Empire" ...
## $ Year : Factor w/ 2 levels "2014","2015": 1 1 1 1 1 1 1 1 1 1 ...
## $ Ratings : num 6.3 7.1 6.2 6.3 4.7 4.6 6.1 7.1 6.5 6.1 ...
## $ Genre : Factor w/ 11 levels "1","2","3","4",..: 7 1 1 1 7 3 7 1 9 7 ...
## $ Gross : num 9.13e-03 1.92e+02 3.07e+01 1.06e+02 1.73e+01 2.90e-02 4.26e+01 5.75 2.60e+01 4.86e+01 ...
## $ Budget : num 4 50 28 110 3.5 0.5 40 20 28 12.5 ...
## $ Screens : num 45 3306 2872 3470 2310 ...
## $ Sequel : num 1 2 1 2 2 1 1 1 1 1 ...
## $ Sentiment : num 0 2 0 0 0 0 0 2 3 0 ...
## $ Views : num 3280543 583289 304861 452917 3145573 ...
## $ Likes : num 4632 3465 328 2429 12163 ...
## $ Dislikes : num 425 61 34 132 610 7 419 197 419 532 ...
## $ Comments : num 636 186 47 590 1082 ...
## $ Aggregate.Followers: num 1120000 12350000 483000 568000 1923800 ...
## $ Gross_log : num -4.7 5.26 3.42 4.66 2.85 ...
## Warning: Removed 46 rows containing non-finite values (stat_density).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 46 rows containing non-finite values (stat_bin).
## Warning: Removed 46 rows containing non-finite values (stat_density).
## Warning in plot.window(...): "dendogram" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dendogram" is not a graphical parameter
## Warning in title(...): "dendogram" is not a graphical parameter
##
## Call:
## lm(formula = Gross_log ~ ., data = mdata_encoded)
##
## Residuals:
## ALL 187 residuals are 0: no residual degrees of freedom!
##
## Coefficients: (21 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) -4.6962 NA NA
## Movie22 Jump Street 9.9537 NA NA
## Movie3 Days to Kill 8.1205 NA NA
## Movie300: Rise of an Empire 9.3596 NA NA
## MovieA Haunted House 2 7.5469 NA NA
## MovieA Million Ways to Die in the West 8.4480 NA NA
## MovieA Most Violent Year 6.4454 NA NA
## MovieA Walk Among the Tombstones 7.9543 NA NA
## MovieAbout Last Night 8.5798 NA NA
## MovieAmerican Sniper 10.5541 NA NA
## MovieAmerican Ultra 7.0850 NA NA
## MovieAmericons 2.6482 NA NA
## MovieAnd So It Goes 7.4175 NA NA
## MovieAnnabelle 9.1306 NA NA
## MovieAnnie 9.1494 NA NA
## MovieAtlas Shrugged: Who Is John Galt? 4.5099 NA NA
## MovieAvengers: Age of Ultron 10.8231 NA NA
## MovieBarefoot 0.2565 NA NA
## MovieBetter Living Through Chemistry 2.0693 NA NA
## MovieBeyond the Lights 7.3772 NA NA
## MovieBig Hero 6 10.0989 NA NA
## MovieBlack or White 7.7689 NA NA
## MovieBlackhat 6.6563 NA NA
## MovieBlended 8.5313 NA NA
## MovieBoyhood 7.9309 NA NA
## MovieBrick Mansions 7.7068 NA NA
## MovieCake 5.3221 NA NA
## MovieCaptain America: The Winter Soldier 10.2569 NA NA
## MovieDawn of the Planet of the Apes 10.0385 NA NA
## MovieDeliver Us from Evil 8.1139 NA NA
## MovieDevil's Due 7.4562 NA NA
## MovieDivergent 9.7135 NA NA
## MovieDolphin Tale 2 8.4339 NA NA
## MovieDope 7.5176 NA NA
## MovieDraft Day 8.0566 NA NA
## MovieDumb and Dumber To 9.1529 NA NA
## MovieEarth to Echo 8.3572 NA NA
## MovieEdge of Tomorrow 9.3014 NA NA
## MovieEndless Love 7.8489 NA NA
## MovieEntourage 8.1743 NA NA
## MovieFantastic Four 8.6608 NA NA
## MovieFifty Shades of Grey 9.8082 NA NA
## MovieFoxcatcher 4.8868 NA NA
## MovieFurious 7 10.5541 NA NA
## MovieFury 9.1470 NA NA
## MovieGet Hard 9.2004 NA NA
## MovieGod Help the Girl 2.4134 NA NA
## MovieGod's Not Dead 8.8038 NA NA
## MovieGodzilla 9.9995 NA NA
## MovieGone Girl 9.8202 NA NA
## MovieGuardians of the Galaxy 10.5043 NA NA
## MovieHappy Christmas 1.1930 NA NA
## MovieHercules 8.9825 NA NA
## MovieHitman: Agent 47 7.4750 NA NA
## MovieHome 9.8723 NA NA
## MovieHorrible Bosses 2 8.6926 NA NA
## MovieHot Pursuit 8.2371 NA NA
## MovieHot Tub Time Machine 2 7.2058 NA NA
## MovieHow to Train Your Dragon 2 9.8723 NA NA
## MovieIf I Stay 8.6182 NA NA
## MovieIn the Name of My Daughter 3.4052 NA NA
## MovieInside Out 10.5397 NA NA
## MovieInsurgent 9.5637 NA NA
## MovieInterstellar 9.9326 NA NA
## MovieInto the Storm 8.5590 NA NA
## MovieInto the Woods 9.5482 NA NA
## MovieJack Ryan: Shadow Recruit 8.6182 NA NA
## MovieJersey Boys 8.5463 NA NA
## MovieJupiter Ascending 8.5548 NA NA
## MovieJurassic World 11.1623 NA NA
## MovieKill the Messenger 5.5923 NA NA
## MovieKingsman: The Secret Service 9.5482 NA NA
## MovieLeft Behind 7.3352 NA NA
## MovieLet's Be Cops 9.1078 NA NA
## MovieLocker 13 -1.3073 NA NA
## MovieMad Max: Fury Road 9.7266 NA NA
## MovieMaggie 2.6636 NA NA
## MovieMagic Mike XXL 8.8843 NA NA
## MovieMaleficent 10.1810 NA NA
## MovieMaps to the Stars 3.6406 NA NA
## MovieMax 8.4315 NA NA
## MovieMe and Earl and the Dying Girl 6.6042 NA NA
## MovieMillion Dollar Arm 8.2908 NA NA
## MovieMinions 10.4800 NA NA
## MovieMission: Impossible - Rogue Nation 9.8379 NA NA
## MovieMoms' Night Out 7.0380 NA NA
## MovieMortdecai 6.7257 NA NA
## MovieMr. Peabody & Sherman 9.4147 NA NA
## MovieMuppets Most Wanted 8.6319 NA NA
## MovieNeed for Speed 8.4712 NA NA
## MovieNeighbors 9.7068 NA NA
## MovieNight at the Museum: Secret of the Tomb 9.4324 NA NA
## MovieNo Good Deed 8.6570 NA NA
## MovieNoah 9.3113 NA NA
## MovieNon-Stop 9.2114 NA NA
## MovieOuija 8.6241 NA NA
## MoviePaddington 9.0282 NA NA
## MoviePaper Towns 8.1430 NA NA
## MovieParanormal Activity: The Marked Ones 8.1774 NA NA
## MoviePaul Blart: Mall Cop 2 8.9589 NA NA
## MoviePenguins of Madagascar 9.1186 NA NA
## MoviePitch Perfect 2 9.9057 NA NA
## MoviePixels 8.9589 NA NA
## MoviePlanes: Fire & Rescue 8.7771 NA NA
## MoviePoltergeist 8.5548 NA NA
## MoviePompeii 7.8403 NA NA
## MovieProject Almanac 7.8008 NA NA
## MovieRicki and the Flash 7.8532 NA NA
## MovieRide Along 9.5940 NA NA
## MovieRio 2 9.5790 NA NA
## MovieRoad Hard 2.4519 NA NA
## MovieRoboCop 8.7669 NA NA
## MovieSabotage 7.0476 NA NA
## MovieSan Andreas 9.7331 NA NA
## MovieSelma 8.6494 NA NA
## MovieSeventh Son 7.5411 NA NA
## MovieSex Tape 8.3468 NA NA
## MovieSin City: A Dame to Kill For 7.3209 NA NA
## MovieSinister 2 7.6406 NA NA
## MovieSon of God 8.7855 NA NA
## MovieSong One 0.7941 NA NA
## MovieSpy 9.3967 NA NA
## MovieSt. Vincent 8.4826 NA NA
## MovieStraight Outta Compton 9.6015 NA NA
## MovieTaken 3 9.1882 NA NA
## MovieTammy 9.1329 NA NA
## MovieTed 2 9.0943 NA NA
## MovieTeenage Mutant Ninja Turtles 9.9485 NA NA
## MovieTerminator Genisys 9.1893 NA NA
## MovieThat Awkward Moment 7.9543 NA NA
## MovieThe Age of Adaline 8.4457 NA NA
## MovieThe Amazing Spider-Man 2 10.0094 NA NA
## MovieThe Best of Me 7.9846 NA NA
## MovieThe Book of Life 8.6122 NA NA
## MovieThe Boxtrolls 8.6241 NA NA
## MovieThe Boy Next Door 8.2629 NA NA
## MovieThe DUFF 8.2226 NA NA
## MovieThe Equalizer 9.3212 NA NA
## MovieThe Expendables 3 8.3674 NA NA
## MovieThe Fault in Our Stars 9.5245 NA NA
## MovieThe Gambler 8.2107 NA NA
## MovieThe Gift 8.2825 NA NA
## MovieThe Giver 8.5051 NA NA
## MovieThe Good Lie 5.6968 NA NA
## MovieThe Gunman 7.0570 NA NA
## MovieThe Hobbit: The Battle of the Five Armies 10.2375 NA NA
## MovieThe Homesman 5.5841 NA NA
## MovieThe Hundred-Foot Journey 8.6889 NA NA
## MovieThe Hunger Games: Mockingjay - Part 1 10.5163 NA NA
## MovieThe Interview 6.5061 NA NA
## MovieThe Judge 8.5485 NA NA
## MovieThe Lazarus Effect 7.9466 NA NA
## MovieThe Legend of Hercules 7.6300 NA NA
## MovieThe Lego Movie 10.2491 NA NA
## MovieThe Loft 6.4846 NA NA
## MovieThe Longest Ride 8.3179 NA NA
## MovieThe Maze Runner 9.3212 NA NA
## MovieThe Monuments Men 9.0529 NA NA
## MovieThe November Man 7.9151 NA NA
## MovieThe One I Love 4.0268 NA NA
## MovieThe Other Woman 9.1258 NA NA
## MovieThe Purge: Anarchy 8.9659 NA NA
## MovieThe Pyramid 5.7078 NA NA
## MovieThe Rover 4.8005 NA NA
## MovieThe SpongeBob Movie: Sponge Out of Water 9.7838 NA NA
## MovieThe Theory of Everything 8.2769 NA NA
## MovieThe Vatican Tapes 5.2327 NA NA
## MovieThe Water Diviner 6.1289 NA NA
## MovieThe Wedding Ringer 8.8629 NA NA
## MovieThe Woman in Black 2: Angel of Death 7.9733 NA NA
## MovieThink Like a Man Too 8.8736 NA NA
## MovieTomorrowland 9.2309 NA NA
## MovieTrainwreck 9.3501 NA NA
## MovieTranscendence 7.8317 NA NA
## MovieTransformers: Age of Extinction 10.1974 NA NA
## MovieTusk 5.2950 NA NA
## MovieUnbroken 9.4498 NA NA
## MovieVacation 8.7017 NA NA
## MovieVeronica Mars 5.8962 NA NA
## MovieWhen the Game Stands Tall 8.1007 NA NA
## MovieWhiplash 7.2688 NA NA
## MovieWild 8.3311 NA NA
## MovieWinter's Tale 0.9019 NA NA
## MovieWish I Was Here 5.9743 NA NA
## MovieWoman in Gold 8.2017 NA NA
## MovieX-Men: Days of Future Past 10.1515 NA NA
## MovieYves Saint Laurent 4.3635 NA NA
## Year2015 NA NA NA
## Ratings NA NA NA
## Genre2 NA NA NA
## Genre3 NA NA NA
## Genre6 NA NA NA
## Genre7 NA NA NA
## Genre8 NA NA NA
## Genre9 NA NA NA
## Genre10 NA NA NA
## Genre12 NA NA NA
## Genre15 NA NA NA
## Gross NA NA NA
## Budget NA NA NA
## Screens NA NA NA
## Sequel NA NA NA
## Sentiment NA NA NA
## Views NA NA NA
## Likes NA NA NA
## Dislikes NA NA NA
## Comments NA NA NA
## Aggregate.Followers NA NA NA
## Pr(>|t|)
## (Intercept) NA
## Movie22 Jump Street NA
## Movie3 Days to Kill NA
## Movie300: Rise of an Empire NA
## MovieA Haunted House 2 NA
## MovieA Million Ways to Die in the West NA
## MovieA Most Violent Year NA
## MovieA Walk Among the Tombstones NA
## MovieAbout Last Night NA
## MovieAmerican Sniper NA
## MovieAmerican Ultra NA
## MovieAmericons NA
## MovieAnd So It Goes NA
## MovieAnnabelle NA
## MovieAnnie NA
## MovieAtlas Shrugged: Who Is John Galt? NA
## MovieAvengers: Age of Ultron NA
## MovieBarefoot NA
## MovieBetter Living Through Chemistry NA
## MovieBeyond the Lights NA
## MovieBig Hero 6 NA
## MovieBlack or White NA
## MovieBlackhat NA
## MovieBlended NA
## MovieBoyhood NA
## MovieBrick Mansions NA
## MovieCake NA
## MovieCaptain America: The Winter Soldier NA
## MovieDawn of the Planet of the Apes NA
## MovieDeliver Us from Evil NA
## MovieDevil's Due NA
## MovieDivergent NA
## MovieDolphin Tale 2 NA
## MovieDope NA
## MovieDraft Day NA
## MovieDumb and Dumber To NA
## MovieEarth to Echo NA
## MovieEdge of Tomorrow NA
## MovieEndless Love NA
## MovieEntourage NA
## MovieFantastic Four NA
## MovieFifty Shades of Grey NA
## MovieFoxcatcher NA
## MovieFurious 7 NA
## MovieFury NA
## MovieGet Hard NA
## MovieGod Help the Girl NA
## MovieGod's Not Dead NA
## MovieGodzilla NA
## MovieGone Girl NA
## MovieGuardians of the Galaxy NA
## MovieHappy Christmas NA
## MovieHercules NA
## MovieHitman: Agent 47 NA
## MovieHome NA
## MovieHorrible Bosses 2 NA
## MovieHot Pursuit NA
## MovieHot Tub Time Machine 2 NA
## MovieHow to Train Your Dragon 2 NA
## MovieIf I Stay NA
## MovieIn the Name of My Daughter NA
## MovieInside Out NA
## MovieInsurgent NA
## MovieInterstellar NA
## MovieInto the Storm NA
## MovieInto the Woods NA
## MovieJack Ryan: Shadow Recruit NA
## MovieJersey Boys NA
## MovieJupiter Ascending NA
## MovieJurassic World NA
## MovieKill the Messenger NA
## MovieKingsman: The Secret Service NA
## MovieLeft Behind NA
## MovieLet's Be Cops NA
## MovieLocker 13 NA
## MovieMad Max: Fury Road NA
## MovieMaggie NA
## MovieMagic Mike XXL NA
## MovieMaleficent NA
## MovieMaps to the Stars NA
## MovieMax NA
## MovieMe and Earl and the Dying Girl NA
## MovieMillion Dollar Arm NA
## MovieMinions NA
## MovieMission: Impossible - Rogue Nation NA
## MovieMoms' Night Out NA
## MovieMortdecai NA
## MovieMr. Peabody & Sherman NA
## MovieMuppets Most Wanted NA
## MovieNeed for Speed NA
## MovieNeighbors NA
## MovieNight at the Museum: Secret of the Tomb NA
## MovieNo Good Deed NA
## MovieNoah NA
## MovieNon-Stop NA
## MovieOuija NA
## MoviePaddington NA
## MoviePaper Towns NA
## MovieParanormal Activity: The Marked Ones NA
## MoviePaul Blart: Mall Cop 2 NA
## MoviePenguins of Madagascar NA
## MoviePitch Perfect 2 NA
## MoviePixels NA
## MoviePlanes: Fire & Rescue NA
## MoviePoltergeist NA
## MoviePompeii NA
## MovieProject Almanac NA
## MovieRicki and the Flash NA
## MovieRide Along NA
## MovieRio 2 NA
## MovieRoad Hard NA
## MovieRoboCop NA
## MovieSabotage NA
## MovieSan Andreas NA
## MovieSelma NA
## MovieSeventh Son NA
## MovieSex Tape NA
## MovieSin City: A Dame to Kill For NA
## MovieSinister 2 NA
## MovieSon of God NA
## MovieSong One NA
## MovieSpy NA
## MovieSt. Vincent NA
## MovieStraight Outta Compton NA
## MovieTaken 3 NA
## MovieTammy NA
## MovieTed 2 NA
## MovieTeenage Mutant Ninja Turtles NA
## MovieTerminator Genisys NA
## MovieThat Awkward Moment NA
## MovieThe Age of Adaline NA
## MovieThe Amazing Spider-Man 2 NA
## MovieThe Best of Me NA
## MovieThe Book of Life NA
## MovieThe Boxtrolls NA
## MovieThe Boy Next Door NA
## MovieThe DUFF NA
## MovieThe Equalizer NA
## MovieThe Expendables 3 NA
## MovieThe Fault in Our Stars NA
## MovieThe Gambler NA
## MovieThe Gift NA
## MovieThe Giver NA
## MovieThe Good Lie NA
## MovieThe Gunman NA
## MovieThe Hobbit: The Battle of the Five Armies NA
## MovieThe Homesman NA
## MovieThe Hundred-Foot Journey NA
## MovieThe Hunger Games: Mockingjay - Part 1 NA
## MovieThe Interview NA
## MovieThe Judge NA
## MovieThe Lazarus Effect NA
## MovieThe Legend of Hercules NA
## MovieThe Lego Movie NA
## MovieThe Loft NA
## MovieThe Longest Ride NA
## MovieThe Maze Runner NA
## MovieThe Monuments Men NA
## MovieThe November Man NA
## MovieThe One I Love NA
## MovieThe Other Woman NA
## MovieThe Purge: Anarchy NA
## MovieThe Pyramid NA
## MovieThe Rover NA
## MovieThe SpongeBob Movie: Sponge Out of Water NA
## MovieThe Theory of Everything NA
## MovieThe Vatican Tapes NA
## MovieThe Water Diviner NA
## MovieThe Wedding Ringer NA
## MovieThe Woman in Black 2: Angel of Death NA
## MovieThink Like a Man Too NA
## MovieTomorrowland NA
## MovieTrainwreck NA
## MovieTranscendence NA
## MovieTransformers: Age of Extinction NA
## MovieTusk NA
## MovieUnbroken NA
## MovieVacation NA
## MovieVeronica Mars NA
## MovieWhen the Game Stands Tall NA
## MovieWhiplash NA
## MovieWild NA
## MovieWinter's Tale NA
## MovieWish I Was Here NA
## MovieWoman in Gold NA
## MovieX-Men: Days of Future Past NA
## MovieYves Saint Laurent NA
## Year2015 NA
## Ratings NA
## Genre2 NA
## Genre3 NA
## Genre6 NA
## Genre7 NA
## Genre8 NA
## Genre9 NA
## Genre10 NA
## Genre12 NA
## Genre15 NA
## Gross NA
## Budget NA
## Screens NA
## Sequel NA
## Sentiment NA
## Views NA
## Likes NA
## Dislikes NA
## Comments NA
## Aggregate.Followers NA
##
## Residual standard error: NaN on 0 degrees of freedom
## (44 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 186 and 0 DF, p-value: NA
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at 0.97
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.0009
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.97
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.03
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
#MISSING DATA ANALYSIS
## Warning: package 'finalfit' was built under R version 3.5.2
## Warning: package 'mice' was built under R version 3.5.2
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following object is masked from 'package:tidyr':
##
## complete
## The following objects are masked from 'package:base':
##
## cbind, rbind
## Continuous
## label var_type n missing_n
## Ratings Ratings <dbl> 231 0
## Gross Gross <dbl> 231 0
## Budget Budget <dbl> 230 1
## Screens Screens <dbl> 221 10
## Sequel Sequel <dbl> 231 0
## Sentiment Sentiment <dbl> 231 0
## Views Views <dbl> 231 0
## Likes Likes <dbl> 231 0
## Dislikes Dislikes <dbl> 231 0
## Comments Comments <dbl> 231 0
## Aggregate.Followers Aggregate.Followers <dbl> 196 35
## missing_percent mean sd min quartile_25
## Ratings 0.0 6.4 1.0 3.1 5.8
## Gross 0.0 68.1 88.9 0.0 10.3
## Budget 0.4 47.9 54.3 0.1 9.0
## Screens 4.3 2209.2 1463.8 2.0 449.0
## Sequel 0.0 1.4 1.0 1.0 1.0
## Sentiment 0.0 2.8 7.0 -38.0 0.0
## Views 0.0 3712851.3 4511104.2 698.0 623302.0
## Likes 0.0 12732.5 28825.5 1.0 1776.5
## Dislikes 0.0 679.1 1243.9 0.0 105.5
## Comments 0.0 1825.7 3571.0 0.0 248.5
## Aggregate.Followers 15.2 3038193.4 4886278.1 1066.0 183025.0
## median quartile_75 max
## Ratings 6.5 7.1 8.7
## Gross 37.4 89.3 643.0
## Budget 28.0 65.0 250.0
## Screens 2777.0 3372.0 4324.0
## Sequel 1.0 1.0 7.0
## Sentiment 0.0 5.5 29.0
## Views 2409338.0 5217379.5 32626778.0
## Likes 6096.0 15247.5 370552.0
## Dislikes 341.0 697.5 13960.0
## Comments 837.0 2137.0 38363.0
## Aggregate.Followers 1052600.0 3694500.0 31030000.0
##
## Categorical
## label var_type n missing_n missing_percent levels_n levels
## Year Year <fct> 231 0 0.0 2 "2014", "2015"
## Genre Genre <fct> 231 0 0.0 11 -
## levels_count levels_percent
## Year 163, 68 71, 29
## Genre - -
## Year Ratings Genre Gross Sequel Sentiment Views Likes Dislikes
## 187 1 1 1 1 1 1 1 1 1
## 33 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0
## Comments Budget Screens Aggregate.Followers
## 187 1 1 1 1 0
## 33 1 1 1 0 1
## 8 1 1 0 1 1
## 2 1 1 0 0 2
## 1 1 0 1 1 1
## 0 1 10 35 46
## Movie Year Ratings Genre Gross Sequel Sentiment Views Likes Dislikes
## 187 1 1 1 1 1 1 1 1 1 1
## 33 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## Comments Gross_log Budget Screens Aggregate.Followers
## 187 1 1 1 1 1 0
## 33 1 1 1 1 0 1
## 8 1 1 1 0 1 1
## 2 1 1 1 0 0 2
## 1 1 1 0 1 1 1
## 0 0 1 10 35 46
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: package 'VIM' was built under R version 3.5.2
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:xts':
##
## first, last
## The following object is masked from 'package:purrr':
##
## transpose
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
##
## Variables sorted by number of missings:
## Variable Count
## Aggregate.Followers 0.151515152
## Screens 0.043290043
## Budget 0.004329004
## Movie 0.000000000
## Year 0.000000000
## Ratings 0.000000000
## Genre 0.000000000
## Gross 0.000000000
## Sequel 0.000000000
## Sentiment 0.000000000
## Views 0.000000000
## Likes 0.000000000
## Dislikes 0.000000000
## Comments 0.000000000
## Gross_log 0.000000000
## Dependent is not a factor and will be treated as a continuous variable
## label levels Mean (sd)
## 1 Year 2014 61.0 (74.7)
## 2 2015 84.9 (115.1)
## 3 Ratings [3.1,8.7] 68.1 (88.9)
## 4 Genre 1 114.5 (121.8)
## 5 2 124.0 (98.6)
## 6 3 31.5 (45.2)
## 7 4 1.2 (NA)
## 8 6 25.3 (16.8)
## 9 7 0.0 (0.0)
## 10 8 43.5 (51.7)
## 11 9 45.2 (45.3)
## 12 10 23.7 (17.7)
## 13 12 134.8 (91.0)
## 14 15 29.4 (25.4)
## 15 Budget [0.07,250] 68.3 (89.0)
## 16 Missing 2.8 (NA)
## 17 Screens [2,4324] 70.8 (89.9)
## 18 Missing 7.1 (17.6)
## 19 Sequel [1,7] 68.1 (88.9)
## 20 Sentiment [-38,29] 68.1 (88.9)
## 21 Views [698,32626778] 68.1 (88.9)
## 22 Likes [1,370552] 68.1 (88.9)
## 23 Dislikes [0,13960] 68.1 (88.9)
## 24 Comments [0,38363] 68.1 (88.9)
## 25 Aggregate.Followers [1066,3.1e+07] 74.4 (93.0)
## 26 Missing 32.6 (48.6)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 35 rows containing non-finite values (stat_boxplot).
## Dependent is not a factor and will be treated as a continuous variable
## Dependent: Gross Mean (sd)
## 22 Year 2014 61.0 (74.7)
## 23 2015 84.9 (115.1)
## 17 Ratings [3.1,8.7] 68.1 (88.9)
## 5 Genre 1 114.5 (121.8)
## 9 2 124.0 (98.6)
## 10 3 31.5 (45.2)
## 11 4 1.2 (NA)
## 12 6 25.3 (16.8)
## 13 7 0.0 (0.0)
## 14 8 43.5 (51.7)
## 15 9 45.2 (45.3)
## 6 10 23.7 (17.7)
## 7 12 134.8 (91.0)
## 8 15 29.4 (25.4)
## 2 Budget [0.07,250] 68.3 (89.0)
## 18 Screens [2,4324] 70.8 (89.9)
## 20 Sequel [1,7] 68.1 (88.9)
## 19 Sentiment [-38,29] 68.1 (88.9)
## 21 Views [698,32626778] 68.1 (88.9)
## 16 Likes [1,370552] 68.1 (88.9)
## 4 Dislikes [0,13960] 68.1 (88.9)
## 3 Comments [0,38363] 68.1 (88.9)
## 1 Aggregate.Followers [1066,3.1e+07] 74.4 (93.0)
## Coefficient (univariable) Coefficient (multivariable)
## 22 - -
## 23 23.88 (-1.28 to 49.03, p=0.063) 3.70 (-18.42 to 25.81, p=0.742)
## 17 30.77 (19.77 to 41.77, p<0.001) 24.39 (13.02 to 35.77, p<0.001)
## 5 - -
## 9 9.55 (-40.26 to 59.36, p=0.706) -17.06 (-58.68 to 24.56, p=0.420)
## 10 -83.02 (-113.57 to -52.47, p<0.001) -24.01 (-54.27 to 6.26, p=0.119)
## 11 -113.29 (-273.03 to 46.46, p=0.164) -
## 12 -89.17 (-182.79 to 4.45, p=0.062) -3.92 (-93.41 to 85.57, p=0.931)
## 13 -114.49 (-228.30 to -0.68, p=0.049) 29.78 (-95.01 to 154.58, p=0.638)
## 14 -70.96 (-100.15 to -41.77, p<0.001) -15.00 (-43.99 to 13.98, p=0.308)
## 15 -69.34 (-117.50 to -21.17, p=0.005) -16.11 (-61.06 to 28.83, p=0.480)
## 6 -90.80 (-140.61 to -40.99, p<0.001) -26.14 (-72.48 to 20.19, p=0.267)
## 7 20.27 (-27.90 to 68.43, p=0.408) -9.47 (-49.89 to 30.96, p=0.645)
## 8 -85.14 (-138.99 to -31.29, p=0.002) 18.21 (-35.44 to 71.86, p=0.504)
## 2 1.18 (1.03 to 1.33, p<0.001) 0.69 (0.44 to 0.95, p<0.001)
## 18 0.04 (0.03 to 0.04, p<0.001) 0.01 (0.00 to 0.02, p=0.005)
## 20 38.94 (28.10 to 49.79, p<0.001) 8.24 (-1.99 to 18.46, p=0.114)
## 19 -0.22 (-1.87 to 1.44, p=0.796) -0.37 (-1.69 to 0.95, p=0.583)
## 21 0.00 (0.00 to 0.00, p=0.007) -0.00 (-0.00 to 0.00, p=0.263)
## 16 0.00 (-0.00 to 0.00, p=0.094) 0.00 (-0.00 to 0.00, p=0.203)
## 4 0.01 (0.00 to 0.02, p=0.014) 0.01 (0.00 to 0.03, p=0.035)
## 3 0.00 (-0.00 to 0.01, p=0.056) -0.00 (-0.01 to 0.00, p=0.400)
## 1 0.00 (0.00 to 0.00, p<0.001) 0.00 (0.00 to 0.00, p=0.009)
##
## iter imp variable
## 1 1 Budget Screens Aggregate.Followers
## 1 2 Budget Screens Aggregate.Followers
## 1 3 Budget Screens Aggregate.Followers
## 1 4 Budget Screens Aggregate.Followers
## 2 1 Budget Screens Aggregate.Followers
## 2 2 Budget Screens Aggregate.Followers
## 2 3 Budget Screens Aggregate.Followers
## 2 4 Budget Screens Aggregate.Followers
## 3 1 Budget Screens Aggregate.Followers
## 3 2 Budget Screens Aggregate.Followers
## 3 3 Budget Screens Aggregate.Followers
## 3 4 Budget Screens Aggregate.Followers
## 4 1 Budget Screens Aggregate.Followers
## 4 2 Budget Screens Aggregate.Followers
## 4 3 Budget Screens Aggregate.Followers
## 4 4 Budget Screens Aggregate.Followers
## 5 1 Budget Screens Aggregate.Followers
## 5 2 Budget Screens Aggregate.Followers
## 5 3 Budget Screens Aggregate.Followers
## 5 4 Budget Screens Aggregate.Followers
## # A tibble: 0 x 15
## # … with 15 variables: Movie <chr>, Gross <dbl>, Gross_log <dbl>,
## # Budget <dbl>, Year <fct>, Ratings <dbl>, Genre <fct>, Screens <dbl>,
## # Sequel <dbl>, Sentiment <dbl>, Views <dbl>, Likes <dbl>,
## # Dislikes <dbl>, Comments <dbl>, Aggregate.Followers <dbl>